package com.wxg.demo;

import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * Jsoup : 
 * 
 * 
 * @author Wxg
 *
 */
public class HowManyBlockTags {

	public static Set<String> tagNameSet = null;
	public static Set<String> otherTagSet = null;
	
	public static void main(String[] args) {
		code101();
	}
	
	/**
	 * 
	 * 
=== block ===
p
td
tbody
hr
table
tr
=== else ===
br
sub
strong
span
	 * 
	 */
	public static void code101(){
		String dirpath = "F:\\wxg\\for_print\\myjfserver\\src\\main\\webapp\\uehtml";
		File dir = new File(dirpath);
		if(dir.isDirectory()){
			File[] fileList = dir.listFiles();
			tagNameSet = new HashSet<String>();
			otherTagSet = new HashSet<String>();
			try {
				for(File file : fileList){
					if(!"myindex.html".equals(file.getName())){
						storeTagName(file);
					}
					if("html_101.html".equals(file.getName())){
						// printAllChildren(file);
						// printAllChildren2(file);
					}
				}
			} catch (Exception e) {
				e.printStackTrace();
			}
			if(null != tagNameSet){
				for(String str : tagNameSet){
					System.out.println(str);
				}
			}
			System.out.println("===");
			if(null != otherTagSet){
				for(String str : otherTagSet){
					System.out.println(str);
				}
			}
		}
	}
	
	/**
	 * HashSet
	 * 
	 * @param file
	 * @throws IOException
	 */
	public static void storeTagName(File file) throws IOException {
		Document doc = null;
		doc = Jsoup.parse(file, "UTF-8");
		Element body = doc.body();
		Elements eles = body.getAllElements(); // body.children(); // body.getAllElements();
		for(Element ele : eles){
			if(!ele.tagName().equalsIgnoreCase("body")){
				if(ele.isBlock()){
					tagNameSet.add(ele.tagName());
				}else{
					otherTagSet.add(ele.tagName());
				}
			}
		}
	}
	
	/**
	 * body.getAllElements()
	 * 
	 * @param file
	 * @throws IOException
	 */
	public static void printAllChildren2(File file) throws IOException{
		Document doc = null;

		doc = Jsoup.parse(file, "UTF-8");
		Element body = doc.body();
		Elements eles = body.getAllElements(); // body.children(); // body.getAllElements();
		for(Element ele : eles){
			if(!ele.tagName().equalsIgnoreCase("body")){
				System.out.println(ele.tagName());
				System.out.println( ele.outerHtml() );
			}
		}
	
	}
	
	/**
	 * body.children()
	 * 
	 * 
	 * @param file
	 * @throws IOException
	 */
	public static void printAllChildren(File file) throws IOException{
		Document doc = null;

		doc = Jsoup.parse(file, "UTF-8");
		Element body = doc.body();
		Elements eles = body.children(); // body.getAllElements();
		for(Element ele : eles){
			System.out.println(ele.tagName());
			System.out.println( ele.outerHtml() );
		}
	
	}
	
	
	
}
